from scrapy_redis.spiders import RedisSpider
from douban.items import *
import random,time

class DoubanRedisSpider(RedisSpider):
    """Spider that reads urls from redis queue (myspider:start_urls)."""
    name = 'douban_redis_spider'
    allowed_domains = ['www.douban.com']
    redis_key = 'douban_redis_spider:start_urls'
    url = 'https://www.douban.com/group/shanghaizufang/discussion?start='
    page = 1

    def parse(self, response):
        print('开始爬取...')
        title_list = response.xpath('//table//td[@class="title"]/a/@title').extract()
        href_list = response.xpath('//table//td[@class="title"]/a/@href').extract()
        for i in range(len(title_list)):
            item = DoubanItem()
            item['title'] = title_list[i]
            item['href'] = href_list[i]
            yield item
        # time.sleep(random.choice([1,2]))
        # self.page += 1
        # if self.page <= 10:
        #     url = self.url + str(25*(self.page - 1))
        #     print(url)
        #     yield scrapy.Request(url=url,callback=self.parse)